*National occupational score distribution, in states that have at least one normal school and asylum
*Exclude people living in group quarters
clear
use "1920_full.dta"

keep if gq~=3 & gq~=4

local vars occscore 
foreach x of local vars{
gen max`x'parent = max(occscore_mom, occscore_pop)
}

gen occmaxoccscore = occ1950_mom if occscore_mom==maxoccscoreparent
replace occmaxoccscore = occ1950_pop if occscore_pop==maxoccscoreparent

keep histid age sex maxoccscoreparent race stateicp countyicp occmaxoccscore occ1950_mom occ1950_pop

* rename the variables to indicate the year they are from
for var histid age sex maxoccscoreparent race stateicp countyicp occmaxoccscore occ1950_mom occ1950_pop: rename X X1920


*Now merge with Eckert crosswalk
rename stateicp1920 icpsrst 
rename countyicp1920 icpsrcty
	
*Merge on cty_fips
joinby icpsrst icpsrcty using "EGLP_1920.dta", unmatched(both)

rename _merge merge1920crosswalk

*For icpsr 40 8300 in census (Williamsburg City) there is not a code in the 1920 crosswalk (there is an observation for
*Williamsburg City, but the icpsr code is missing)
*We know that it becomes 510 8300 in nghis1990, so fill that in.  This does not crosswalk to any other counties, and it is not a normal/asylum county, so 
*don't need to adjust in the main code since it will be dropped.  Here we just need it for the national distribution.

replace nhgisst_1990 = 510 if icpsrst==40 & icpsrcty==8300 & nhgisst_1990==. 
replace nhgiscty_1990 = 8300 if icpsrst==40 & icpsrcty==8300 & nhgiscty_1990==. 

*For Hampton County, listed as Northampton (510 1310 nhgis) and Southampton (510 1750 nhgis) in Crosswalk, so just choose
*one to fill in for the people in IPUMS since not using the county.  Neither Northampton or Southampton are normal/asylum counties, 
*nor do they crosswalk to normal/asylum 
*counties, so don't need to adjust the main code because they will be dropped.  Just need them here to get the national distribution.

replace nhgisst_1990 = 510 if icpsrst==40 & icpsrcty==6500 & nhgisst_1990==.
replace nhgiscty_1990 = 1750 if icpsrst==40 & icpsrcty==6500 & nhgiscty_1990==.

*The other counties that have merge = 1 are in Nevada, and that gets dropped because not at least one normal an asylum, and Oregon.

*The only county that does not merge in Oregon is icpsr 72 0605, which is listed as Union County in IPUMS ICPSR Codes
*In the crosswalk Union County is 610, but in IPUMS 610 is Umpqua. A very small fraction of Union County (610) becomes part of 72 590 (Umatilla), 
*which is a Normal School county but
*the weight is .00000000333, so round this to zero, and don't need to adjust the main code (also Union County is not a normal/asylum county). 
*But still want 72 605 people in IPUMS to be in the national distribution
*so replace the nhgis codes to be equal to the crosswalk codes (for 72 605 in IPUMS, set nhgis code to 410 610). 

replace nhgisst_1990 = 410 if icpsrst==72 & icpsrcty==0605 & nhgisst_1990==.
replace nhgiscty_1990 = 610 if icpsrst==72 & icpsrcty==0605 & nhgiscty_1990==.

gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)

*Drop non-merged from crosswalk
drop if merge1920crosswalk==2

*Drop the non-merged from IPUMS, this should be some counties in Nevada, which is not in our sample, so it is okay 
drop if merge1920crosswalk==1 & cty_fips==.

rename cty_fips cty_fips1920
drop year icpsrst icpsrcty nhgisst_1990 nhgiscty_1990 nhgisst nhgiscty statenam nhgisnam area_base statenam_1990 nhgisnam_1990 icpsrst_1990 icpsrcty_1990 area us_state 

tostring cty_fips1920, gen(cty_fipstr)
gen statefip1920 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1920 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1920, replace

*Drop DE, Georgia, RI, SC, NV, WY, AZ, NM, AK, HI because don't have at least one normal school and one asylum county

drop if statefip==2|statefip==4|statefip==10|statefip==13|statefip==15|statefip==32|statefip==35|statefip==44|statefip==45|statefip==56

gen male1920 = sex1920==1
replace male1920 = . if sex1920==.

gen white1920 = race1920==1
replace white1920 = . if race1920==.

gen occparentsnonmiss = (occ1950_pop1920~=.|occ1950_mom1920~=.)

*Only keep those that are living with at least one of their parents--to avoid getting people who traveled for school, asylum

keep if occparentsnonmiss==1
egen taghistid = tag(histid1920)

keep if taghistid==1
local vars maxoccscoreparent1920
foreach x of local vars{
bysort white1920: egen med`x' = median(`x')
gen bmed`x' = `x'<med`x'
replace bmed`x' = . if `x'==.
tab med`x' white1920
}

egen medmaxoccscoreparent1920all = median(maxoccscoreparent1920)
tab medmaxoccscoreparent1920all
tab maxoccscoreparent1920 if white1920==1
tab occmaxoccscore if white1920==1 & maxoccscoreparent1920<=20, sort

preserve
drop if statefip==11
egen medmaxoccscoreparent1920nodc = median(maxoccscoreparent1920)
tab medmaxoccscoreparent1920nodc
tab maxoccscoreparent1920 if white1920==1
restore


